'''
    Nobel Laureates, from 1901 to 2023

    This file contains the code to create the dataset 
    hosted on Harvard Dataverse. 

    The data is sourced from the Nobel Prize API. For
    more information, see https://www.nobelprize.org/about/developer-zone-2/.

    OpenAPI documentation: https://app.swaggerhub.com/apis/NobelMedia/NobelMasterData/2.1
'''
import requests
import pandas as pd


def process_api_page(df, prizes):
    '''
    Processes the JSON object returned by an API call
    '''
    for prize in prizes:
        date_awarded = prize['dateAwarded'] if 'dateAwarded' in prize else ""
        prize_amount = prize['prizeAmount'] if 'prizeAmount' in prize else None
        prize_amount_adj = prize['prizeAmountAdjusted'] if 'prizeAmountAdjusted' in prize else None
        # skip prize of no laureate was recorded
        if 'laureates' not in prize: 
            continue
        for x in prize['laureates']:
            key = 'knownName' if 'knownName' in x else 'orgName'
            motivation = x['motivation']['en']
            motivation = motivation.replace("\r"," ").replace("\n"," ")
            entry = [int(prize['awardYear']), prize['category']['en'], x[key]['en'], motivation, date_awarded, int(prize_amount), int(prize_amount_adj)]
            df.append(entry)
    return df

def retrieve_nobel_prize_data(offset=0, limit=25, output_path="nobel_prizes.tsv"):
    '''
    Collate Nobel Prize data into a single dataset
    '''
    base_url = "https://masterdataapi.nobelprize.org/2.1/nobelPrizes"
    process_pages = True
    headers = ['award_year', 'category', 'laureate', 'motivation', 'date_awarded', 'prize_amount', 'prize_amount_adjusted']
    df = []
    while process_pages:
        url = f"{base_url}?offset={offset}&limit={limit}"
        res = requests.get(url)
        if res.status_code == 200: 
            raw_data = res.json()
            metadata = raw_data["meta"]
            df = process_api_page(df, raw_data['nobelPrizes'])
            # process prizes
            offset = metadata['offset'] + metadata['limit']
            if offset >= metadata['count']: process_pages = False
        else:
            print(f"Unable to make request to URL {url}")
            process_pages = False
    # write output to csv file
    with open(output_path, 'w', encoding='utf-8') as outfile:
        outfile.write('\t'.join(headers))
        for row in df:
            outfile.write("\n")
            row_string = ""
            for r in row:
                row_string
            outfile.write('\t'.join([(f"\"{str(r)}\"" if isinstance(r, str) else str(r)) for r in row]))
        
if __name__ == "__main__":
    retrieve_nobel_prize_data()
